# -*- coding: utf-8 -*-
import scrapy
import re
import json
from ..items import FemaleCrawlItem
import requests
import os


class LuckySpider(scrapy.Spider):
    name = 'lucky'
    allowed_domains = ['www.7799520.com']
    #start_urls = ['http://www.7799520.com/']

    def start_requests(self):
        # 女性信息起始地址
        url_female = "http://www.7799520.com/api/user/pc/list/search?gender=2&marry=1&page=1"
        yield scrapy.Request(url=url_female,callback=self.get_female_data,dont_filter=True)

    def get_female_data(self,response):
        current_url = response.url
        pattern = r'http:\/\/www\.7799520\.com\/api\/user\/pc\/list\/search\?gender=2&marry=1&page=(\d+)'
        new_page = int(re.compile(pattern).findall(current_url)[0])+1
        new_url = "http://www.7799520.com/api/user/pc/list/search?gender=2&marry=1&page="+str(new_page)
        yield scrapy.Request(url=new_url,callback=self.get_female_data,dont_filter=True)
        #print(response.url)
        data = response.body
        json_local_data = json.loads(data)
        userinfo_list = json_local_data['data']['list']
        #print(userinfo_list)
        for i in userinfo_list:
            userid = i['userid']
            userpage = "http://www.7799520.com/user/{}.html".format(userid)
            yield scrapy.Request(url=userpage,callback=self.parse_female_Page,dont_filter=True)
        pass


    def parse_female_Page(self, response):
        item = FemaleCrawlItem()
        #用户名
        id_name = response.xpath('//span[@class="nick c3e"]/text()').extract()[0]
        item["id_name"] = response.xpath('//span[@class="nick c3e"]/text()').extract()[0]
        #下载图片
        img_url = response.xpath('//li[@class=""]/img/@src').extract()[0]
        req = requests.get(img_url)
        if not os.path.exists(r'D:\学习文件\python高阶编程\爬虫\我主良缘\female_picture\{}.jpg'.format(id_name)):
            with open(r'D:\学习文件\python高阶编程\爬虫\我主良缘\female_picture\{}.jpg'.format(id_name),'wb') as f:
                f.write(req.content)
                f.close()
        item["picture_url"] = r'D:\学习文件\python高阶编程\爬虫\我主良缘\female_picture\{}.jpg'.format(id_name)
        #年龄
        item["age"] = response.xpath('//span[@class="age s1"]/text()').extract()[0]
        #性别
        item["sex"] = response.xpath('//p[@class="f18 c3e p2"]/span[2]/text()').extract()[0]
        #婚姻状况
        item["marrage"] = response.xpath('//span[@class="marrystatus"]/text()').extract()[0]
        #身高
        item["heigh"] = response.xpath('//span[@class="height"]/text()').extract()[0]
        #学历
        item["education"] = response.xpath('//span[@class="education"]/text()').extract()[0]
        #现居
        item["local"] = response.xpath('//ul[@class="clearfix user-info"]/li[1]/span/text()').extract()[0]
        #籍贯
        item["native"] = response.xpath('//ul[@class="clearfix user-info"]/li[2]/span/text()').extract()[0]
        #星座
        item["constellation"] = response.xpath('//ul[@class="clearfix user-info"]/li[3]/span/text()').extract()[0]
        #生肖
        item["zodiac"] = response.xpath('//ul[@class="clearfix user-info"]/li[4]/span/text()').extract()[0]
        #血型
        item["blood_type"] = response.xpath('//ul[@class="clearfix user-info"]/li[6]/span/text()').extract()[0]
        #职业
        item["job"] = response.xpath('//ul[@class="clearfix user-info"]/li[7]/span/text()').extract()[0]
        #收入
        item["income"] = response.xpath('//ul[@class="clearfix user-info"]/li[8]/span/text()').extract()[0]
        #内心独白
        item["monologue"] = response.xpath('//div[@class="body no-border"]/p/text()').extract()[0]
        #择偶条件
        item["select"] = ' '.join(response.xpath('//ul[@class="cm-g info-list"]/li/text()').extract()).replace('\n','')
        #兴趣爱好
        item["instrest"] = ' '.join(response.xpath('//dd[@class="content"]/span/text()').extract())
        yield item


